import os, shutil, pathlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.callbacks import TensorBoard
from keras.models import Sequential, Model, load_model
from keras.layers import Flatten, Dense, Dropout, RandomFlip, RandomRotation
from tensorflow.keras.layers import Embedding
from keras.preprocessing import sequence
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.applications import ResNet50
from keras.layers import RandomZoom, GlobalAveragePooling2D
from tensorflow.keras.regularizers import l2
2024-05-14 11:43:33.768097: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
from PIL import Image
img_width = 224
img_height = 224
random_seed = 123
batch_size = 32
working_dir = '/Users/austensteinberg/Desktop/Machine Learning/ML_Final/'
geo_train = pathlib.Path(os.path.join(working_dir, 'train_dataset'))
geo_test = pathlib.Path(os.path.join(working_dir, 'test_dataset'))
def extract_meta(path):
meta = []
for file_path in path.glob('*'):
#provide width, height
width, height = Image.open(file_path).size
metadata.append({
'country': path.name,
'image_name' :file_path.name,
'width': width,
'height' : height,
'size' :file_path.stat().st_size,
'path' : file_path
})
return
import os
import pandas as pd
# Define the path to the folder containing the country folders
folder_path = '/Users/austensteinberg/Desktop/Machine Learning /ML_Final/compressed_dataset'
# Initialize empty lists to store metadata
all_metadata = {
'country': [],
'image_name': []
}
# Iterate through each country folder
for country_folder in os.listdir(folder_path):
country_path = os.path.join(folder_path, country_folder)
# Check if the item in the folder is indeed a directory
if os.path.isdir(country_path):
# Get a list of image files in the country folder
image_files = [f for f in os.listdir(country_path) if f.endswith('.jpg') or f.endswith('.png')]
# Add metadata for each image in the country folder
for image in image_files:
all_metadata['country'].append(country_folder)
all_metadata['image_name'].append(image)
# Create a DataFrame from the metadata
df_geo_data = pd.DataFrame(all_metadata)
# Group by 'country' and count the frequency of 'image_name'
df_data_distribution = df_geo_data.groupby('country')['image_name'].count().reset_index().rename(columns={'image_name': 'frequency'})
# Print the DataFrame to see the data distribution
print(df_data_distribution)
country frequency 0 Aland 9 1 Albania 41 2 American Samoa 16 3 Andorra 13 4 Antarctica 1 .. ... ... 119 United Kingdom 2484 120 United States 12014 121 Uruguay 57 122 Venezuela 1 123 Vietnam 15 [124 rows x 2 columns]
print(df_geo_data)
country image_name 0 Bhutan canvas_1629262074.jpg 1 Bhutan canvas_1629527767.jpg 2 Bhutan canvas_1629551780.jpg 3 Bhutan canvas_1629992395.jpg 4 Bhutan canvas_1629687800.jpg ... ... ... 49992 Iceland canvas_1629971749.jpg 49993 Iceland canvas_1629503620.jpg 49994 Iceland canvas_1629521495.jpg 49995 Paraguay canvas_1629908268.jpg 49996 Paraguay canvas_1630216530.jpg [49997 rows x 2 columns]
df_geo_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 49997 entries, 0 to 49996 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 country 49997 non-null object 1 image_name 49997 non-null object dtypes: object(2) memory usage: 781.3+ KB
import random
from PIL import Image
import plotly.graph_objects as go
# Select a random country
random_country = df_geo_data.sample(1)
# Get the path to a random image in the selected country
country_path = os.path.join(folder_path, country_folder).format(random_country['country'].iloc[0])
image_files = [f for f in os.listdir(country_path) if f.endswith('.jpg') or f.endswith('.png')]
random_image = random.choice(image_files)
image_path = os.path.join(country_path, random_image)
# Open the image using PIL
img = Image.open(image_path)
# Create a plotly figure to display the image
fig = go.Figure()
fig.add_trace(go.Image(z=img))
fig.update_layout(
title="Image from the training dataset",
template="plotly_white",
width=900,
height=450
)
fig.update_xaxes(title_text="Width")
fig.update_yaxes(title_text="Height")
fig.add_annotation(
x=0.5,
y=1.1,
xref="paper",
yref="paper",
text="Country : {}".format(random_country['country'].iloc[0]),
showarrow=False
)
fig.show()